####################################################################
####################################################################
## Replication Material
##
## Michael Jankowski and Stefan M�ller
##
## michael.jankowski@uol.de | stefan.mueller@ucd.ie
## 
## Incumbency Advantage in Lower-Order PR Elections:
## Evidence from the Irish Context, 1942-2019
## 
## Electoral Studies

## File: 01b_prepare_rdd_data_general_elections.R

## See 00_description_data_and_scripts.pdf for a detailed 
## overview of the required data and the outputs of this file.

####################################################################
####################################################################


# This dataset merges the data provided by Redmond and Regan 
# and the candidate-level election results for recent elections

library(dplyr)   # CRAN v1.0.4
library(rio)     # CRAN v0.5.16
library(stringr) # CRAN v1.4.0
library(tidyr)   # CRAN v1.1.2

# load data with general election results
complete_raw <- readRDS("data_elections_general_raw.rds")


# create relevant variables - mostly from regular expressions
# note: because dual mandate was abolished for these elections, no merging of TDs required
complete_raw$dm <- stringr::str_extract_all(complete_raw$dm, "[0-9]+") %>% unlist %>% as.numeric()
complete_raw$quota <- stringr::str_extract_all(complete_raw$quota, "[0-9]+") %>% unlist %>% as.numeric()
complete_raw$eligible <- gsub("Electorate: |Turnout.*", "", complete_raw$voter_info) %>% as.numeric
complete_raw$votes_cast <- gsub(".*Turnout: |Valid.*", "", complete_raw$voter_info) %>% as.numeric
complete_raw$valid_votes <- gsub(".*Valid: |Spoilt.*", "", complete_raw$voter_info) %>% as.numeric
complete_raw$spoilt_votes <- gsub(".*Spoilt: ", "", complete_raw$voter_info) %>% as.numeric
complete_raw$turnout <- 100*complete_raw$votes_cast/complete_raw$eligible
complete_raw$turnout[complete_raw$turnout < 1 | complete_raw$turnout>100] <- NA
complete_raw$eligible[complete_raw$turnout < 1 | complete_raw$turnout>100] <- NA

# extract IDs

complete_raw$election_id <- stringr::str_extract_all(complete_raw$const_link, "elecid=[0-9]+") %>% unlist %>% gsub("elecid=", "", .) %>% as.numeric()
complete_raw$constit_id <- stringr::str_extract_all(complete_raw$const_link, "constitid=[0-9]+") %>% unlist %>% gsub("constitid=", "", .) %>% as.numeric()

table(complete_raw$election_id)

complete <- complete_raw %>% 
  separate(name, into = c("constituency", "election"), sep = ":", remove = FALSE) %>% 
  mutate(election_year = ifelse(str_detect(election, "2007"), "2007",
                                ifelse(str_detect(election, "2011"), "2011",
                                       ifelse(str_detect(election, "2016"), "2016",
                                              ifelse(str_detect(election, "2020"), "2020", NA))))) %>% 
  arrange(candidate, election_year) %>% 
  group_by(candidate) %>% # <-- add here how candidates are merged
  mutate(reelected = lead(elected, order_by = candidate),
         party_next = lead(party, order_by = candidate),
         constituency_next = lead(constituency, order_by = candidate)) %>%
  mutate(reelected = replace(reelected, is.na(reelected), 0),
         reran = ifelse(!is.na(party_next), 1, 0),
         reran = ifelse(reran == 1 & constituency_next == constituency, 1, 0), 
         reelected = ifelse(reelected == 1 & constituency_next == constituency, 1, 0)) %>%
  ungroup() %>% 
  filter(!is.na(vote_margin))

table(complete$reelected)

table(complete$election_year)


# replicate and extend Redmond and Regan's (2015) analysis of general elections
dat_redmondregan <- rio::import("data_redmond_regan_replication.xlsx") %>%
  as.data.frame()

table(dat_redmondregan)

dat_redmondregan$nextwin <- as.numeric(dat_redmondregan$nextwin)

# rename variables according to Redmond and Regan's coding
complete <- complete %>% 
  mutate(nextwin = reelected,
         share = vote_margin,
         election_year = as.numeric(election_year)) %>% 
  select(nextwin, share, year = election_year, candidate) %>% 
  mutate(nextwin = as.character(nextwin)) %>% 
  filter(year > 2007)

complete$nextwin <- as.numeric(complete$nextwin)

# remove 2011 election from Redmond and Regan (no information on rerunning and reelection)
dat_redmondregan <- filter(dat_redmondregan, year != 2011)

# keep only relevant variables
dat_redmondregan <- dat_redmondregan %>% 
  select(nextwin, share, year)

# bind with Redmond and Regan replication data
dat_general <- bind_rows(dat_redmondregan, complete)

table(dat_general$year)

dat_general <- ungroup(dat_general)

# save dataset
saveRDS(dat_general, "data_general_elections_complete.rds")
